Actividad 6 - 2.2 Pruebas de raíz unitaria - Equipo¶
Johnson & Johnson (JNJ) vs The Coca-Cola Company (KO) | 2022-01-01 to 2025-10-10
Pruebas integrales de raiz unitaria, modelado ARIMA, pronostico y analisis de cointegracion.
In [ ]:
import yfinance as yf
import pandas as pd
# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'
# Fetch stock data for Johnson & Johnson (JNJ)
jnj = yf.download('JNJ', start=start_date, end=end_date)
# Display the first 5 rows
print(jnj.head(5))
[*********************100%***********************] 1 of 1 completed
Price Close High Low Open Volume Ticker JNJ JNJ JNJ JNJ JNJ Date 2022-01-03 153.545593 153.599311 151.361559 152.355121 6012200 2022-01-04 153.133850 154.216914 152.704206 153.115944 6748400 2022-01-05 154.154297 155.273173 153.617231 153.957374 7016100 2022-01-06 153.626160 154.404893 152.883224 154.091602 7301600 2022-01-07 155.702774 156.016065 153.178597 153.473982 6986000
In [ ]:
import yfinance as yf
import pandas as pd
# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'
# Fetch stock data for The Coca-Cola Company (KO)
ko = yf.download('KO', start=start_date, end=end_date)
# Display the first 5 rows
print(ko.head(5))
[*********************100%***********************] 1 of 1 completed
Price Close High Low Open Volume Ticker KO KO KO KO KO Date 2022-01-03 52.996700 53.005639 52.174493 52.567722 20187300 2022-01-04 53.881470 54.104896 53.255876 53.380994 26141600 2022-01-05 54.328323 54.694742 53.666980 53.702729 22507300 2022-01-06 54.042339 54.623245 54.015526 54.042339 17902300 2022-01-07 53.917213 54.265758 53.675913 53.872525 12307900
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
# Suppress the FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning)
# Download stock data from Yahoo Finance
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']
# Download data with explicit auto_adjust=False to get Adj Close column
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']
# If the above still fails, use this alternative approach:
# data = yf.download(tickers, start=start_date, end=end_date)
# data = data['Close'] # Use regular Close prices instead
# Extract individual series
ko_close = data['KO']
jnj_close = data['JNJ']
print("KO data length:", len(ko_close))
print("JNJ data length:", len(jnj_close))
print("\nKO data sample:")
print(ko_close.head())
print("\nJNJ data sample:")
print(jnj_close.head())
# Remove any NaN values
ko_close = ko_close.dropna()
jnj_close = jnj_close.dropna()
# Align by common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]
print(f"\nCommon data length after alignment: {len(ko_close)}")
print(f"Date range: {ko_close.index[0]} to {ko_close.index[-1]}")
# Function for unit root tests
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
# KPSS Test
kpss_result = kpss(series, regression='c')
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")
# Difference the series if non-stationary
ko_diff = ko_close.diff().dropna()
jnj_diff = jnj_close.diff().dropna()
# Function to find best ARMA model (using ARIMA with d=0)
def find_best_arma(series, name, max_p=3, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, 0, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, 0, q)
except:
continue
print(f"\nBest ARMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
best_model = ARIMA(series, order=best_order).fit()
return best_model
# Fit ARMA models
ko_arma = find_best_arma(ko_diff, "KO")
jnj_arma = find_best_arma(jnj_diff, "JNJ")
# Cointegration test
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]: # 95% critical value
print(f"r = {i}: Cointegration exists at 95% confidence level")
else:
print(f"r = {i}: No cointegration at 95% confidence level")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'KO': ko_close,
'JNJ': jnj_close
})
# Run cointegration test
cointegration_test(coint_df)
# Plot the series
plt.figure(figsize=(12,6))
plt.plot(ko_close, label='KO')
plt.plot(jnj_close, label='JNJ')
plt.title('KO vs JNJ Adjusted Closing Prices')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Plot the differenced series
plt.figure(figsize=(12,6))
plt.plot(ko_diff, label='KO Diff', alpha=0.7)
plt.plot(jnj_diff, label='JNJ Diff', alpha=0.7)
plt.title('Differenced Series')
plt.xlabel('Date')
plt.ylabel('Price Change ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Print summary statistics
print("\nSummary Statistics:")
print(ko_close.describe())
print("\n")
print(jnj_close.describe())
[*********************100%***********************] 2 of 2 completed
KO data length: 946
JNJ data length: 946
KO data sample:
Date
2022-01-03 52.996700
2022-01-04 53.881470
2022-01-05 54.328323
2022-01-06 54.042339
2022-01-07 53.917213
Name: KO, dtype: float64
JNJ data sample:
Date
2022-01-03 153.545593
2022-01-04 153.133850
2022-01-05 154.154297
2022-01-06 153.626160
2022-01-07 155.702774
Name: JNJ, dtype: float64
Common data length after alignment: 946
Date range: 2022-01-03 00:00:00 to 2025-10-09 00:00:00
Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Best ARMA model for KO:
Order: (0, 0, 0)
AIC: 1740.05
Best ARMA model for JNJ:
Order: (3, 0, 1)
AIC: 3657.76
Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level
Summary Statistics: count 946.000000 mean 59.843887 std 5.674025 min 49.325050 25% 55.762364 50% 57.922932 75% 63.459334 max 72.819817 Name: KO, dtype: float64 count 946.000000 mean 153.386964 std 8.841785 min 136.714722 25% 147.932114 50% 152.329422 75% 158.662666 max 191.080002 Name: JNJ, dtype: float64
Conclusiones:
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()
# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")
# Function for unit root tests
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")
# Cointegration test
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f"r = {i}: Cointegration exists at 95% confidence level")
else:
print(f"r = {i}: No cointegration at 95% confidence level")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'KO': ko_close,
'JNJ': jnj_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
return best_order
# Find and fit best ARIMA models
ko_order = find_best_arima(ko_close, "KO")
jnj_order = find_best_arima(jnj_close, "JNJ")
# Fit final ARIMA models
ko_model = ARIMA(ko_close, order=ko_order).fit()
jnj_model = ARIMA(jnj_close, order=jnj_order).fit()
# Forecast next 30 periods
forecast_steps = 30
ko_forecast = ko_model.forecast(steps=forecast_steps)
jnj_forecast = jnj_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = ko_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(ko_close.index, ko_close, label='KO Historical')
plt.plot(forecast_index, ko_forecast, label='KO Forecast', color='red')
plt.plot(jnj_close.index, jnj_close, label='JNJ Historical')
plt.plot(forecast_index, jnj_forecast, label='JNJ Forecast', color='green')
plt.title('KO and JNJ Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Plot detailed forecast with confidence intervals
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Generate detailed forecast plots
plot_forecast(ko_model, ko_close, "KO")
plot_forecast(jnj_model, jnj_close, "JNJ")
# Print forecast values
print("\nKO Forecast Values (next 5 periods):")
print(ko_forecast[:5])
print("\nJNJ Forecast Values (next 5 periods):")
print(jnj_forecast[:5])
# Print model diagnostics
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
print(f"\nKO Model: ARIMA{ko_order}")
print(f"JNJ Model: ARIMA{jnj_order}")
print("\nNote: Use model.summary() for detailed parameter estimates and diagnostics")
print("The models automatically handle:")
print("- Unit root testing (via optimal differencing d)")
print("- Cointegration analysis (Johansen test)")
print("- Optimal parameter selection (AIC minimization)")
print("- 30-day ahead forecasting with confidence intervals")
[*********************100%***********************] 2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance... Data successfully downloaded and aligned! Common data points: 946 Date range: 2022-01-03 to 2025-10-09 Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level
Best ARIMA model for KO:
Order: (2, 1, 2)
AIC: 1734.92
Best ARIMA model for JNJ:
Order: (3, 1, 1)
AIC: 3656.40
KO Forecast Values (next 5 periods): 946 66.190071 947 66.116038 948 66.190193 949 66.368658 950 66.547343 Name: predicted_mean, dtype: float64 JNJ Forecast Values (next 5 periods): 946 190.756738 947 190.891776 948 190.686445 949 190.902232 950 190.696202 Name: predicted_mean, dtype: float64 ================================================== MODEL DIAGNOSTICS ================================================== KO Model: ARIMA(2, 1, 2) JNJ Model: ARIMA(3, 1, 1) Note: Use model.summary() for detailed parameter estimates and diagnostics The models automatically handle: - Unit root testing (via optimal differencing d) - Cointegration analysis (Johansen test) - Optimal parameter selection (AIC minimization) - 30-day ahead forecasting with confidence intervals
Conclusiones:
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()
# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")
# Function for unit root tests with interpretation
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
print("Interpretation:")
if adf_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
print("Interpretation:")
if kpss_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be stationary")
# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")
# Cointegration test with interpretation
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
print("Interpretation:")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f" - r = {i}: Cointegration exists at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
else:
print(f" - r = {i}: No cointegration at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
if result.lr1[0] > result.cvt[0, 1]:
print("Conclusion: KO and JNJ are cointegrated - they share a long-run equilibrium relationship")
else:
print("Conclusion: No evidence of cointegration between KO and JNJ")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'KO': ko_close,
'JNJ': jnj_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
print("Interpretation:")
print(f" - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
print(f" - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
print(f" - q={best_order[2]}: {best_order[2]} moving average term(s)")
return best_order
# Find and fit best ARIMA models
ko_order = find_best_arima(ko_close, "KO")
jnj_order = find_best_arima(jnj_close, "JNJ")
# Fit final ARIMA models
ko_model = ARIMA(ko_close, order=ko_order).fit()
jnj_model = ARIMA(jnj_close, order=jnj_order).fit()
# Forecast next 30 periods
forecast_steps = 30
ko_forecast = ko_model.forecast(steps=forecast_steps)
jnj_forecast = jnj_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = ko_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(ko_close.index, ko_close, label='KO Historical')
plt.plot(forecast_index, ko_forecast, label='KO Forecast', color='red')
plt.plot(jnj_close.index, jnj_close, label='JNJ Historical')
plt.plot(forecast_index, jnj_forecast, label='JNJ Forecast', color='green')
plt.title('KO and JNJ Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Forecast interpretation
last_value = series.iloc[-1]
mean_forecast = forecast.mean()
print(f"\nForecast Interpretation for {name}:")
print(f"Last observed value: ${last_value:.2f}")
print(f"Average forecast value: ${mean_forecast:.2f}")
print(f"Forecast change: ${mean_forecast - last_value:.2f}")
if mean_forecast > last_value:
print("Trend: Upward forecast trend")
elif mean_forecast < last_value:
print("Trend: Downward forecast trend")
else:
print("Trend: Flat forecast trend")
print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
# Generate detailed forecast plots and interpretations
plot_forecast(ko_model, ko_close, "KO")
plot_forecast(jnj_model, jnj_close, "JNJ")
# Print forecast values
print("\nKO Forecast Values (next 5 periods):")
print(ko_forecast[:5])
print("\nJNJ Forecast Values (next 5 periods):")
print(jnj_forecast[:5])
print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print("✓ Unit root tests performed (ADF & KPSS)")
print("✓ Cointegration analysis completed (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC minimization")
print("✓ 30-day forecasts generated with 95% confidence intervals")
print("✓ Detailed interpretations provided for all results")
print(f"\nData period: {start_date} to {end_date}")
print(f"Tickers analyzed: KO (Coca-Cola) and JNJ (Johnson & Johnson)")
[*********************100%***********************] 2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - KO may be non-stationary
KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - KO is non-stationary
Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - JNJ may be non-stationary
KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - JNJ is non-stationary
Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
Interpretation:
- r = 0: No cointegration at 95% confidence level
Trace statistic (9.57) <= 95% critical value (15.49)
- r = 1: No cointegration at 95% confidence level
Trace statistic (2.83) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between KO and JNJ
Best ARIMA model for KO: Order: (2, 1, 2) AIC: 1734.92 Interpretation: - p=2: 2 autoregressive term(s) - d=1: 1 difference(s) needed for stationarity - q=2: 2 moving average term(s) Best ARIMA model for JNJ: Order: (3, 1, 1) AIC: 3656.40 Interpretation: - p=3: 3 autoregressive term(s) - d=1: 1 difference(s) needed for stationarity - q=1: 1 moving average term(s)
Forecast Interpretation for KO: Last observed value: $66.37 Average forecast value: $66.36 Forecast change: $-0.01 Trend: Downward forecast trend 95% CI range at period 30: [$60.02, $73.19]
Forecast Interpretation for JNJ: Last observed value: $191.08 Average forecast value: $190.80 Forecast change: $-0.28 Trend: Downward forecast trend 95% CI range at period 30: [$174.56, $207.16] KO Forecast Values (next 5 periods): 946 66.190071 947 66.116038 948 66.190193 949 66.368658 950 66.547343 Name: predicted_mean, dtype: float64 JNJ Forecast Values (next 5 periods): 946 190.756738 947 190.891776 948 190.686445 949 190.902232 950 190.696202 Name: predicted_mean, dtype: float64 ============================================================ ANALYSIS COMPLETE ============================================================ ✓ Unit root tests performed (ADF & KPSS) ✓ Cointegration analysis completed (Johansen test) ✓ Optimal ARIMA models selected via AIC minimization ✓ 30-day forecasts generated with 95% confidence intervals ✓ Detailed interpretations provided for all results Data period: 2022-01-01 to 2025-10-10 Tickers analyzed: KO (Coca-Cola) and JNJ (Johnson & Johnson)
Conclusiones:
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()
# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")
# Function for unit root tests with interpretation
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
print("Interpretation:")
if adf_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
print("Interpretation:")
if kpss_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be stationary")
# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")
# Function to plot correlograms with interpretation
def plot_correlograms(series, name, lags=30):
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
plot_acf(series, lags=lags, ax=plt.gca())
plt.title(f'ACF for {name}')
plt.subplot(2, 1, 2)
plot_pacf(series, lags=lags, ax=plt.gca())
plt.title(f'PACF for {name}')
plt.tight_layout()
plt.show()
print(f"\nCorrelogram Interpretation for {name}:")
print(" - ACF: Shows total correlation at each lag, including indirect effects")
print(" - PACF: Shows direct correlation at each lag, controlling for earlier lags")
print(" - Significant spikes outside the blue confidence interval suggest strong correlations")
print(" - ACF decay pattern indicates potential ARIMA model orders")
print(" - PACF cutoff suggests AR order, while ACF cutoff suggests MA order")
# Plot correlograms for original series
plot_correlograms(ko_close, "KO Original")
plot_correlograms(jnj_close, "JNJ Original")
# Difference the series
ko_diff = ko_close.diff().dropna()
jnj_diff = jnj_close.diff().dropna()
# Plot correlograms for differenced series
plot_correlograms(ko_diff, "KO Differenced")
plot_correlograms(jnj_diff, "JNJ Differenced")
# Cointegration test with interpretation
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
print("Interpretation:")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f" - r = {i}: Cointegration exists at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
else:
print(f" - r = {i}: No cointegration at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
if result.lr1[0] > result.cvt[0, 1]:
print("Conclusion: KO and JNJ are cointegrated - they share a long-run equilibrium relationship")
else:
print("Conclusion: No evidence of cointegration between KO and JNJ")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'KO': ko_close,
'JNJ': jnj_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
print("Interpretation:")
print(f" - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
print(f" - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
print(f" - q={best_order[2]}: {best_order[2]} moving average term(s)")
return best_order
# Find and fit best ARIMA models
ko_order = find_best_arima(ko_close, "KO")
jnj_order = find_best_arima(jnj_close, "JNJ")
# Fit final ARIMA models
ko_model = ARIMA(ko_close, order=ko_order).fit()
jnj_model = ARIMA(jnj_close, order=jnj_order).fit()
# Forecast next 30 periods
forecast_steps = 30
ko_forecast = ko_model.forecast(steps=forecast_steps)
jnj_forecast = jnj_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = ko_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(ko_close.index, ko_close, label='KO Historical')
plt.plot(forecast_index, ko_forecast, label='KO Forecast', color='red')
plt.plot(jnj_close.index, jnj_close, label='JNJ Historical')
plt.plot(forecast_index, jnj_forecast, label='JNJ Forecast', color='green')
plt.title('KO and JNJ Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Forecast interpretation
last_value = series.iloc[-1]
mean_forecast = forecast.mean()
print(f"\nForecast Interpretation for {name}:")
print(f"Last observed value: ${last_value:.2f}")
print(f"Average forecast value: ${mean_forecast:.2f}")
print(f"Forecast change: ${mean_forecast - last_value:.2f}")
if mean_forecast > last_value:
print("Trend: Upward forecast trend")
elif mean_forecast < last_value:
print("Trend: Downward forecast trend")
else:
print("Trend: Flat forecast trend")
print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
print("Interpretation: The wider the confidence interval, the less certain the forecast")
# Generate detailed forecast plots and interpretations
plot_forecast(ko_model, ko_close, "KO")
plot_forecast(jnj_model, jnj_close, "JNJ")
# Plot correlograms for model residuals
plot_correlograms(ko_model.resid, "KO ARIMA Residuals")
plot_correlograms(jnj_model.resid, "JNJ ARIMA Residuals")
# Print forecast values
print("\nKO Forecast Values (next 5 periods):")
print(ko_forecast[:5])
print("\nJNJ Forecast Values (next 5 periods):")
print(jnj_forecast[:5])
print("\n" + "="*60)
print("COMPLETE TIME SERIES ANALYSIS")
print("="*60)
print("✓ Unit root tests (ADF & KPSS) performed")
print("✓ ACF/PACF correlograms analyzed (original & differenced)")
print("✓ Cointegration testing (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC")
print("✓ 30-day forecasts with 95% confidence intervals")
print("✓ Residual diagnostics via correlograms")
print(f"\nData: KO (Coca-Cola) & JNJ (Johnson & Johnson)")
print(f"Period: {start_date} to {end_date}")
[*********************100%***********************] 2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - KO may be non-stationary
KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - KO is non-stationary
Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - JNJ may be non-stationary
KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - JNJ is non-stationary
Correlogram Interpretation for KO Original: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for JNJ Original: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for KO Differenced: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for JNJ Differenced:
- ACF: Shows total correlation at each lag, including indirect effects
- PACF: Shows direct correlation at each lag, controlling for earlier lags
- Significant spikes outside the blue confidence interval suggest strong correlations
- ACF decay pattern indicates potential ARIMA model orders
- PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
Interpretation:
- r = 0: No cointegration at 95% confidence level
Trace statistic (9.57) <= 95% critical value (15.49)
- r = 1: No cointegration at 95% confidence level
Trace statistic (2.83) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between KO and JNJ
Best ARIMA model for KO:
Order: (2, 1, 2)
AIC: 1734.92
Interpretation:
- p=2: 2 autoregressive term(s)
- d=1: 1 difference(s) needed for stationarity
- q=2: 2 moving average term(s)
Best ARIMA model for JNJ:
Order: (3, 1, 1)
AIC: 3656.40
Interpretation:
- p=3: 3 autoregressive term(s)
- d=1: 1 difference(s) needed for stationarity
- q=1: 1 moving average term(s)
Forecast Interpretation for KO: Last observed value: $66.37 Average forecast value: $66.36 Forecast change: $-0.01 Trend: Downward forecast trend 95% CI range at period 30: [$60.02, $73.19] Interpretation: The wider the confidence interval, the less certain the forecast
Forecast Interpretation for JNJ: Last observed value: $191.08 Average forecast value: $190.80 Forecast change: $-0.28 Trend: Downward forecast trend 95% CI range at period 30: [$174.56, $207.16] Interpretation: The wider the confidence interval, the less certain the forecast
Correlogram Interpretation for KO ARIMA Residuals: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for JNJ ARIMA Residuals: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order KO Forecast Values (next 5 periods): 946 66.190071 947 66.116038 948 66.190193 949 66.368658 950 66.547343 Name: predicted_mean, dtype: float64 JNJ Forecast Values (next 5 periods): 946 190.756738 947 190.891776 948 190.686445 949 190.902232 950 190.696202 Name: predicted_mean, dtype: float64 ============================================================ COMPLETE TIME SERIES ANALYSIS ============================================================ ✓ Unit root tests (ADF & KPSS) performed ✓ ACF/PACF correlograms analyzed (original & differenced) ✓ Cointegration testing (Johansen test) ✓ Optimal ARIMA models selected via AIC ✓ 30-day forecasts with 95% confidence intervals ✓ Residual diagnostics via correlograms Data: KO (Coca-Cola) & JNJ (Johnson & Johnson) Period: 2022-01-01 to 2025-10-10
Conclusiones:
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.stattools import adfuller, zivot_andrews
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()
# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")
# Function for Phillips-Perron test with interpretation
def phillips_perron_test(series, name):
print(f"\nPhillips-Perron Test for {name}:")
# Use adfuller with settings to approximate PP test
pp_result = adfuller(series, regression='c', autolag='AIC', maxlag=None)
print(f'PP Statistic: {pp_result[0]:.4f}')
print(f'p-value: {pp_result[1]:.4f}')
print(f'Critical Values: {pp_result[4]}')
print("Interpretation:")
if pp_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
print(" - Note: Using ADF with constant and automatic lag selection to approximate PP test")
print(" - PP test adjusts for serial correlation and heteroskedasticity non-parametrically")
return pp_result
# Function for Zivot-Andrews structural break test with interpretation
def zivot_andrews_test(series, name):
print(f"\nZivot-Andrews Structural Break Test for {name}:")
za_result = zivot_andrews(series, regression='c', autolag='AIC')
print(f'ZA Statistic: {za_result[0]:.4f}')
print(f'p-value: {za_result[1]:.4f}')
print(f'Critical Values: {za_result[2]}')
print(f'Breakpoint Index: {za_result[3]}')
print("Interpretation:")
if za_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} has a structural break")
print(f" - Breakpoint at index {za_result[3]} (position in series)")
else:
print(f" - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break")
print(" - ZA test allows for a single break in intercept and/or trend")
return za_result
# Perform Phillips-Perron tests
ko_pp = phillips_perron_test(ko_close, "KO")
jnj_pp = phillips_perron_test(jnj_close, "JNJ")
# Perform Zivot-Andrews tests
ko_za = zivot_andrews_test(ko_close, "KO")
jnj_za = zivot_andrews_test(jnj_close, "JNJ")
# Plot series with breakpoints
def plot_series_with_breakpoint(series, name, breakpoint_idx):
plt.figure(figsize=(12, 6))
plt.plot(series.index, series.values, label=f'{name} Adjusted Closing Prices')
# Convert breakpoint index to actual date
if 0 <= breakpoint_idx < len(series):
breakpoint_date = series.index[breakpoint_idx]
plt.axvline(x=breakpoint_date, color='red', linestyle='--',
label=f'Breakpoint ({breakpoint_date.date()})')
print(f" - Breakpoint date: {breakpoint_date.date()}")
else:
print(f" - Warning: Breakpoint index {breakpoint_idx} out of range (0-{len(series)-1})")
# Use a fallback vertical line at the middle of the series
mid_idx = len(series) // 2
mid_date = series.index[mid_idx]
plt.axvline(x=mid_date, color='orange', linestyle=':',
label='Reference line (invalid breakpoint)')
plt.title(f'{name} Adjusted Closing Prices with Structural Break')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"\nPlot Interpretation for {name}:")
print(f" - Red dashed line indicates the detected structural break")
print(" - Break may reflect significant market events, policy changes, or economic shifts")
print(" - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)")
print(" - For KO and JNJ, consider pharmaceutical/healthcare events or consumer goods trends")
# Plot series with breakpoints using the series' own datetime index
plot_series_with_breakpoint(ko_close, "KO (Coca-Cola)", int(ko_za[3]))
plot_series_with_breakpoint(jnj_close, "JNJ (Johnson & Johnson)", int(jnj_za[3]))
# Summary analysis
print("\n" + "="*70)
print("STRUCTURAL BREAK ANALYSIS SUMMARY")
print("="*70)
print(f"KO Phillips-Perron p-value: {ko_pp[1]:.4f} {'(Stationary)' if ko_pp[1]<0.05 else '(Non-stationary)'}")
print(f"JNJ Phillips-Perron p-value: {jnj_pp[1]:.4f} {'(Stationary)' if jnj_pp[1]<0.05 else '(Non-stationary)'}")
print(f"\nKO Zivot-Andrews p-value: {ko_za[1]:.4f} {'(Structural break detected)' if ko_za[1]<0.05 else '(No clear break)'}")
print(f"KO Breakpoint: {ko_close.index[int(ko_za[3])].date() if 0 <= int(ko_za[3]) < len(ko_close) else 'Invalid'}")
print(f"JNJ Zivot-Andrews p-value: {jnj_za[1]:.4f} {'(Structural break detected)' if jnj_za[1]<0.05 else '(No clear break)'}")
print(f"JNJ Breakpoint: {jnj_close.index[int(jnj_za[3])].date() if 0 <= int(jnj_za[3]) < len(jnj_close) else 'Invalid'}")
print(f"\nData Period: {start_date} to {end_date}")
print("Analysis covers potential impacts from:")
print("- COVID-19 pandemic effects and recovery")
print("- Interest rate changes and inflation")
print("- Supply chain disruptions")
print("- Company-specific events (mergers, product launches, regulatory changes)")
print("- Broader market volatility")
print("\nRecommendations:")
print("1. If structural breaks detected, consider regime-switching models")
print("2. For non-stationary series, use differencing or cointegration approaches")
print("3. Investigate specific events around breakpoint dates")
print("4. Consider sector-specific factors (consumer goods for KO, healthcare for JNJ)")
print("5. Validate breakpoints with external economic calendars and company news")
[*********************100%***********************] 2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance... Data successfully downloaded and aligned! Common data points: 946 Date range: 2022-01-03 to 2025-10-09 Phillips-Perron Test for KO:
PP Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - KO may be non-stationary
- Note: Using ADF with constant and automatic lag selection to approximate PP test
- PP test adjusts for serial correlation and heteroskedasticity non-parametrically
Phillips-Perron Test for JNJ:
PP Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - JNJ may be non-stationary
- Note: Using ADF with constant and automatic lag selection to approximate PP test
- PP test adjusts for serial correlation and heteroskedasticity non-parametrically
Zivot-Andrews Structural Break Test for KO:
ZA Statistic: -3.9519
p-value: 0.3809
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 0
Interpretation:
- p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
- ZA test allows for a single break in intercept and/or trend
Zivot-Andrews Structural Break Test for JNJ:
ZA Statistic: -2.2551
p-value: 0.9926
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 2
Interpretation:
- p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
- ZA test allows for a single break in intercept and/or trend
- Breakpoint date: 2022-01-03
Plot Interpretation for KO (Coca-Cola): - Red dashed line indicates the detected structural break - Break may reflect significant market events, policy changes, or economic shifts - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact) - For KO and JNJ, consider pharmaceutical/healthcare events or consumer goods trends - Breakpoint date: 2022-01-05
Plot Interpretation for JNJ (Johnson & Johnson): - Red dashed line indicates the detected structural break - Break may reflect significant market events, policy changes, or economic shifts - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact) - For KO and JNJ, consider pharmaceutical/healthcare events or consumer goods trends ====================================================================== STRUCTURAL BREAK ANALYSIS SUMMARY ====================================================================== KO Phillips-Perron p-value: 0.4495 (Non-stationary) JNJ Phillips-Perron p-value: 0.6018 (Non-stationary) KO Zivot-Andrews p-value: 0.3809 (No clear break) KO Breakpoint: 2022-01-03 JNJ Zivot-Andrews p-value: 0.9926 (No clear break) JNJ Breakpoint: 2022-01-05 Data Period: 2022-01-01 to 2025-10-10 Analysis covers potential impacts from: - COVID-19 pandemic effects and recovery - Interest rate changes and inflation - Supply chain disruptions - Company-specific events (mergers, product launches, regulatory changes) - Broader market volatility Recommendations: 1. If structural breaks detected, consider regime-switching models 2. For non-stationary series, use differencing or cointegration approaches 3. Investigate specific events around breakpoint dates 4. Consider sector-specific factors (consumer goods for KO, healthcare for JNJ) 5. Validate breakpoints with external economic calendars and company news
Apple (AAPL) vs Walt Disney Company (DIS) | 2022-01-01 to 2025-10-10¶
In [ ]:
import yfinance as yf
import pandas as pd
# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'
# Fetch stock data for Apple (AAPL)
aapl = yf.download('AAPL', start=start_date, end=end_date)
# Display the first 5 rows
print(aapl.head(5))
[*********************100%***********************] 1 of 1 completed
Price Close High Low Open Volume Ticker AAPL AAPL AAPL AAPL AAPL Date 2022-01-03 178.443100 179.296061 174.227380 174.345024 104487900 2022-01-04 176.178436 179.354948 175.609801 179.051025 99310400 2022-01-05 171.492050 176.639165 171.217539 176.090142 94537600 2022-01-06 168.629272 171.864605 168.276327 169.315551 96904000 2022-01-07 168.795944 170.727339 167.678286 169.501835 86709100
In [ ]:
import yfinance as yf
import pandas as pd
# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'
# Fetch stock data for The Walt Disney Company (DIS)
dis = yf.download('DIS', start=start_date, end=end_date)
# Display the first 5 rows
print(dis.head(5))
[*********************100%***********************] 1 of 1 completed
Price Close High Low Open Volume Ticker DIS DIS DIS DIS DIS Date 2022-01-03 154.189560 154.976445 152.812522 153.274817 10222800 2022-01-04 153.176422 157.691169 152.999381 155.989526 16582000 2022-01-05 152.645309 156.766607 152.556789 153.953503 12272100 2022-01-06 154.327240 155.182984 151.160038 153.678074 11095300 2022-01-07 155.242004 156.687901 153.727248 154.327246 9554600
In [ ]:
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
# Suppress the FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning)
# Download stock data from Yahoo Finance
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['AAPL', 'DIS']
# Download data with explicit auto_adjust=False to get Adj Close column
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']
# If the above still fails, use this alternative approach:
# data = yf.download(tickers, start=start_date, end=end_date)
# data = data['Close'] # Use regular Close prices instead
# Extract individual series
dis_close = data['DIS']
aapl_close = data['AAPL']
print("DIS data length:", len(dis_close))
print("AAPL data length:", len(aapl_close))
print("\nDIS data sample:")
print(dis_close.head())
print("\nAAPL data sample:")
print(aapl_close.head())
# Remove any NaN values
dis_close = dis_close.dropna()
aapl_close = aapl_close.dropna()
# Align by common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]
print(f"\nCommon data length after alignment: {len(dis_close)}")
print(f"Date range: {dis_close.index[0]} to {dis_close.index[-1]}")
# Function for unit root tests
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
# KPSS Test
kpss_result = kpss(series, regression='c')
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")
# Difference the series if non-stationary
dis_diff = dis_close.diff().dropna()
aapl_diff = aapl_close.diff().dropna()
# Function to find best ARMA model (using ARIMA with d=0)
def find_best_arma(series, name, max_p=3, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, 0, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, 0, q)
except:
continue
print(f"\nBest ARMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
best_model = ARIMA(series, order=best_order).fit()
return best_model
# Fit ARMA models
dis_arma = find_best_arma(dis_diff, "DIS")
aapl_arma = find_best_arma(series=aapl_diff, name="AAPL")
# Cointegration test
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]: # 95% critical value
print(f"r = {i}: Cointegration exists at 95% confidence level")
else:
print(f"r = {i}: No cointegration at 95% confidence level")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'DIS': dis_close,
'AAPL': aapl_close
})
# Run cointegration test
cointegration_test(coint_df)
# Plot the series
plt.figure(figsize=(12,6))
plt.plot(dis_close, label='DIS')
plt.plot(aapl_close, label='AAPL')
plt.title('DIS vs AAPL Adjusted Closing Prices')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Plot the differenced series
plt.figure(figsize=(12,6))
plt.plot(dis_diff, label='DIS Diff', alpha=0.7)
plt.plot(aapl_diff, label='AAPL Diff', alpha=0.7)
plt.title('Differenced Series')
plt.xlabel('Date')
plt.ylabel('Price Change ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Print summary statistics
print("\nSummary Statistics:")
print(dis_close.describe())
print("\n")
print(aapl_close.describe())
[ 0% ] [*********************100%***********************] 2 of 2 completed
DIS data length: 946
AAPL data length: 946
DIS data sample:
Date
2022-01-03 154.189560
2022-01-04 153.176422
2022-01-05 152.645309
2022-01-06 154.327240
2022-01-07 155.242004
Name: DIS, dtype: float64
AAPL data sample:
Date
2022-01-03 178.443100
2022-01-04 176.178436
2022-01-05 171.492050
2022-01-06 168.629272
2022-01-07 168.795944
Name: AAPL, dtype: float64
Common data length after alignment: 946
Date range: 2022-01-03 00:00:00 to 2025-10-09 00:00:00
Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Best ARMA model for DIS:
Order: (2, 0, 3)
AIC: 3954.67
Best ARMA model for AAPL:
Order: (3, 0, 1)
AIC: 4934.01
Johansen Cointegration Test:
Trace statistic: [12.12681683 1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level
Summary Statistics: count 946.000000 mean 103.151529 std 15.573004 min 78.019356 25% 91.578011 50% 99.899529 75% 112.197226 max 155.301010 Name: DIS, dtype: float64 count 946.000000 mean 185.420839 std 32.913288 min 123.281334 25% 160.700706 50% 180.398621 75% 213.006443 max 258.103729 Name: AAPL, dtype: float64
Conclusiones
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()
# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")
# Function for unit root tests
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")
# Cointegration test
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f"r = {i}: Cointegration exists at 95% confidence level")
else:
print(f"r = {i}: No cointegration at 95% confidence level")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'DIS': dis_close,
'AAPL': aapl_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
return best_order
# Find and fit best ARIMA models
dis_order = find_best_arima(dis_close, "DIS")
aapl_order = find_best_arima(aapl_close, "AAPL")
# Fit final ARIMA models
dis_model = ARIMA(dis_close, order=dis_order).fit()
aapl_model = ARIMA(aapl_close, order=aapl_order).fit()
# Forecast next 30 periods
forecast_steps = 30
dis_forecast = dis_model.forecast(steps=forecast_steps)
aapl_forecast = aapl_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = dis_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(dis_close.index, dis_close, label='DIS Historical')
plt.plot(forecast_index, dis_forecast, label='DIS Forecast', color='red')
plt.plot(aapl_close.index, aapl_close, label='AAPL Historical')
plt.plot(forecast_index, aapl_forecast, label='AAPL Forecast', color='green')
plt.title('DIS and AAPL Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Plot detailed forecast with confidence intervals
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Generate detailed forecast plots
plot_forecast(dis_model, dis_close, "DIS")
plot_forecast(aapl_model, aapl_close, "AAPL")
# Print forecast values
print("\nDIS Forecast Values (next 5 periods):")
print(dis_forecast[:5])
print("\nAAPL Forecast Values (next 5 periods):")
print(aapl_forecast[:5])
# Print model diagnostics
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
print(f"\nKO Model: ARIMA{dis_order}")
print(f"JNJ Model: ARIMA{aapl_order}")
print("\nNote: Use model.summary() for detailed parameter estimates and diagnostics")
print("The models automatically handle:")
print("- Unit root testing (via optimal differencing d)")
print("- Cointegration analysis (Johansen test)")
print("- Optimal parameter selection (AIC minimization)")
print("- 30-day ahead forecasting with confidence intervals")
[*********************100%***********************] 2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Johansen Cointegration Test:
Trace statistic: [12.12681683 1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level
Best ARIMA model for DIS: Order: (2, 1, 3) AIC: 3953.08 Best ARIMA model for AAPL: Order: (1, 1, 0) AIC: 4932.70
DIS Forecast Values (next 5 periods): 946 111.258834 947 111.758426 948 112.129465 949 112.166258 950 111.876199 Name: predicted_mean, dtype: float64 AAPL Forecast Values (next 5 periods): 946 253.853858 947 253.845240 948 253.844840 949 253.844822 950 253.844821 Name: predicted_mean, dtype: float64 ================================================== MODEL DIAGNOSTICS ================================================== KO Model: ARIMA(2, 1, 3) JNJ Model: ARIMA(1, 1, 0) Note: Use model.summary() for detailed parameter estimates and diagnostics The models automatically handle: - Unit root testing (via optimal differencing d) - Cointegration analysis (Johansen test) - Optimal parameter selection (AIC minimization) - 30-day ahead forecasting with confidence intervals
Conclusiones
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()
# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")
# Function for unit root tests with interpretation
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
print("Interpretation:")
if adf_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
print("Interpretation:")
if kpss_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be stationary")
# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")
# Cointegration test with interpretation
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
print("Interpretation:")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f" - r = {i}: Cointegration exists at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
else:
print(f" - r = {i}: No cointegration at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
if result.lr1[0] > result.cvt[0, 1]:
print("Conclusion: DIS and AAPL are cointegrated - they share a long-run equilibrium relationship")
else:
print("Conclusion: No evidence of cointegration between DIS and AAPL")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'DIS': dis_close,
'AAPL': aapl_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
print("Interpretation:")
print(f" - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
print(f" - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
print(f" - q={best_order[2]}: {best_order[2]} moving average term(s)")
return best_order
# Find and fit best ARIMA models
dis_order = find_best_arima(dis_close, "DIS")
aapl_order = find_best_arima(aapl_close, "AAPL")
# Fit final ARIMA models
dis_model = ARIMA(dis_close, order=dis_order).fit()
aapl_model = ARIMA(aapl_close, order=aapl_order).fit()
# Forecast next 30 periods
forecast_steps = 30
dis_forecast = dis_model.forecast(steps=forecast_steps)
aapl_forecast = aapl_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = dis_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(dis_close.index, dis_close, label='DIS Historical')
plt.plot(forecast_index, dis_forecast, label='DIS Forecast', color='red')
plt.plot(aapl_close.index, aapl_close, label='AAPL Historical')
plt.plot(forecast_index, aapl_forecast, label='AAPL Forecast', color='green')
plt.title('DIS and AAPL Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Forecast interpretation
last_value = series.iloc[-1]
mean_forecast = forecast.mean()
print(f"\nForecast Interpretation for {name}:")
print(f"Last observed value: ${last_value:.2f}")
print(f"Average forecast value: ${mean_forecast:.2f}")
print(f"Forecast change: ${mean_forecast - last_value:.2f}")
if mean_forecast > last_value:
print("Trend: Upward forecast trend")
elif mean_forecast < last_value:
print("Trend: Downward forecast trend")
else:
print("Trend: Flat forecast trend")
print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
# Generate detailed forecast plots and interpretations
plot_forecast(dis_model, dis_close, "DIS")
plot_forecast(aapl_model, aapl_close, "AAPL")
# Print forecast values
print("\nDIS Forecast Values (next 5 periods):")
print(dis_forecast[:5])
print("\nAAPL Forecast Values (next 5 periods):")
print(aapl_forecast[:5])
print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print("✓ Unit root tests performed (ADF & KPSS)")
print("✓ Cointegration analysis completed (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC minimization")
print("✓ 30-day forecasts generated with 95% confidence intervals")
print("✓ Detailed interpretations provided for all results")
print(f"\nData period: {start_date} to {end_date}")
print(f"Tickers analyzed: DIS (The Walt Disney Company) and AAPL (APPLE)")
[*********************100%***********************] 2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value < 0.05: Reject null hypothesis - DIS is stationary
KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - DIS is non-stationary
Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - AAPL may be non-stationary
KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - AAPL is non-stationary
Johansen Cointegration Test:
Trace statistic: [12.12681683 1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
Interpretation:
- r = 0: No cointegration at 95% confidence level
Trace statistic (12.13) <= 95% critical value (15.49)
- r = 1: No cointegration at 95% confidence level
Trace statistic (1.01) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between DIS and AAPL
Best ARIMA model for DIS: Order: (2, 1, 3) AIC: 3953.08 Interpretation: - p=2: 2 autoregressive term(s) - d=1: 1 difference(s) needed for stationarity - q=3: 3 moving average term(s) Best ARIMA model for AAPL: Order: (1, 1, 0) AIC: 4932.70 Interpretation: - p=1: 1 autoregressive term(s) - d=1: 1 difference(s) needed for stationarity - q=0: 0 moving average term(s)
Forecast Interpretation for DIS: Last observed value: $110.99 Average forecast value: $111.63 Forecast change: $0.64 Trend: Upward forecast trend 95% CI range at period 30: [$88.62, $134.37]
Forecast Interpretation for AAPL: Last observed value: $254.04 Average forecast value: $253.85 Forecast change: $-0.19 Trend: Downward forecast trend 95% CI range at period 30: [$216.95, $290.74] DIS Forecast Values (next 5 periods): 946 111.258834 947 111.758426 948 112.129465 949 112.166258 950 111.876199 Name: predicted_mean, dtype: float64 AAPL Forecast Values (next 5 periods): 946 253.853858 947 253.845240 948 253.844840 949 253.844822 950 253.844821 Name: predicted_mean, dtype: float64 ============================================================ ANALYSIS COMPLETE ============================================================ ✓ Unit root tests performed (ADF & KPSS) ✓ Cointegration analysis completed (Johansen test) ✓ Optimal ARIMA models selected via AIC minimization ✓ 30-day forecasts generated with 95% confidence intervals ✓ Detailed interpretations provided for all results Data period: 2022-01-01 to 2025-10-10 Tickers analyzed: DIS (The Walt Disney Company) and AAPL (APPLE)
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()
# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")
# Function for unit root tests with interpretation
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
print("Interpretation:")
if adf_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
print("Interpretation:")
if kpss_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be stationary")
# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")
# Function to plot correlograms with interpretation
def plot_correlograms(series, name, lags=30):
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
plot_acf(series, lags=lags, ax=plt.gca())
plt.title(f'ACF for {name}')
plt.subplot(2, 1, 2)
plot_pacf(series, lags=lags, ax=plt.gca())
plt.title(f'PACF for {name}')
plt.tight_layout()
plt.show()
print(f"\nCorrelogram Interpretation for {name}:")
print(" - ACF: Shows total correlation at each lag, including indirect effects")
print(" - PACF: Shows direct correlation at each lag, controlling for earlier lags")
print(" - Significant spikes outside the blue confidence interval suggest strong correlations")
print(" - ACF decay pattern indicates potential ARIMA model orders")
print(" - PACF cutoff suggests AR order, while ACF cutoff suggests MA order")
# Plot correlograms for original series
plot_correlograms(dis_close, "DIS Original")
plot_correlograms(aapl_close, "AAPL Original")
# Difference the series
dis_diff = dis_close.diff().dropna()
aapl_diff = aapl_close.diff().dropna()
# Plot correlograms for differenced series
plot_correlograms(dis_diff, "DIS Differenced")
plot_correlograms(aapl_diff, "AAPL Differenced")
# Cointegration test with interpretation
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
print("Interpretation:")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f" - r = {i}: Cointegration exists at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
else:
print(f" - r = {i}: No cointegration at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
if result.lr1[0] > result.cvt[0, 1]:
print("Conclusion: DIS and AAPL are cointegrated - they share a long-run equilibrium relationship")
else:
print("Conclusion: No evidence of cointegration between DIS and AAPL")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'DIS': dis_close,
'AAPL': aapl_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
print("Interpretation:")
print(f" - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
print(f" - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
print(f" - q={best_order[2]}: {best_order[2]} moving average term(s)")
return best_order
# Find and fit best ARIMA models
dis_order = find_best_arima(dis_close, "DIS")
aapl_order = find_best_arima(aapl_close, "AAPL")
# Fit final ARIMA models
dis_model = ARIMA(dis_close, order=dis_order).fit()
aapl_model = ARIMA(aapl_close, order=aapl_order).fit()
# Forecast next 30 periods
forecast_steps = 30
dis_forecast = dis_model.forecast(steps=forecast_steps)
aapl_forecast = aapl_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = dis_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(dis_close.index, dis_close, label='DIS Historical')
plt.plot(forecast_index, dis_forecast, label='DIS Forecast', color='red')
plt.plot(aapl_close.index, aapl_close, label='AAPL Historical')
plt.plot(forecast_index, aapl_forecast, label='AAPL Forecast', color='green')
plt.title('DIS and AAPL Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Forecast interpretation
last_value = series.iloc[-1]
mean_forecast = forecast.mean()
print(f"\nForecast Interpretation for {name}:")
print(f"Last observed value: ${last_value:.2f}")
print(f"Average forecast value: ${mean_forecast:.2f}")
print(f"Forecast change: ${mean_forecast - last_value:.2f}")
if mean_forecast > last_value:
print("Trend: Upward forecast trend")
elif mean_forecast < last_value:
print("Trend: Downward forecast trend")
else:
print("Trend: Flat forecast trend")
print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
print("Interpretation: The wider the confidence interval, the less certain the forecast")
# Generate detailed forecast plots and interpretations
plot_forecast(dis_model, dis_close, "DIS")
plot_forecast(aapl_model, aapl_close, "AAPL")
# Plot correlograms for model residuals
plot_correlograms(dis_model.resid, "DIS ARIMA Residuals")
plot_correlograms(aapl_model.resid, "AAPL ARIMA Residuals")
# Print forecast values
print("\nDIS Forecast Values (next 5 periods):")
print(dis_forecast[:5])
print("\nAAPL Forecast Values (next 5 periods):")
print(aapl_forecast[:5])
print("\n" + "="*60)
print("COMPLETE TIME SERIES ANALYSIS")
print("="*60)
print("✓ Unit root tests (ADF & KPSS) performed")
print("✓ ACF/PACF correlograms analyzed (original & differenced)")
print("✓ Cointegration testing (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC")
print("✓ 30-day forecasts with 95% confidence intervals")
print("✓ Residual diagnostics via correlograms")
print(f"\nData: DIS (The Walt Disney Company) & AAPL (Apple)")
print(f"Period: {start_date} to {end_date}")
[*********************100%***********************] 2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value < 0.05: Reject null hypothesis - DIS is stationary
KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - DIS is non-stationary
Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - AAPL may be non-stationary
KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - AAPL is non-stationary
Correlogram Interpretation for DIS Original: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for AAPL Original: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for DIS Differenced: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for AAPL Differenced:
- ACF: Shows total correlation at each lag, including indirect effects
- PACF: Shows direct correlation at each lag, controlling for earlier lags
- Significant spikes outside the blue confidence interval suggest strong correlations
- ACF decay pattern indicates potential ARIMA model orders
- PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Johansen Cointegration Test:
Trace statistic: [12.12681683 1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
Interpretation:
- r = 0: No cointegration at 95% confidence level
Trace statistic (12.13) <= 95% critical value (15.49)
- r = 1: No cointegration at 95% confidence level
Trace statistic (1.01) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between DIS and AAPL
Best ARIMA model for DIS:
Order: (2, 1, 3)
AIC: 3953.08
Interpretation:
- p=2: 2 autoregressive term(s)
- d=1: 1 difference(s) needed for stationarity
- q=3: 3 moving average term(s)
Best ARIMA model for AAPL:
Order: (1, 1, 0)
AIC: 4932.70
Interpretation:
- p=1: 1 autoregressive term(s)
- d=1: 1 difference(s) needed for stationarity
- q=0: 0 moving average term(s)
Forecast Interpretation for DIS: Last observed value: $110.99 Average forecast value: $111.63 Forecast change: $0.64 Trend: Upward forecast trend 95% CI range at period 30: [$88.62, $134.37] Interpretation: The wider the confidence interval, the less certain the forecast
Forecast Interpretation for AAPL: Last observed value: $254.04 Average forecast value: $253.85 Forecast change: $-0.19 Trend: Downward forecast trend 95% CI range at period 30: [$216.95, $290.74] Interpretation: The wider the confidence interval, the less certain the forecast
Correlogram Interpretation for DIS ARIMA Residuals: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for AAPL ARIMA Residuals: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order DIS Forecast Values (next 5 periods): 946 111.258834 947 111.758426 948 112.129465 949 112.166258 950 111.876199 Name: predicted_mean, dtype: float64 AAPL Forecast Values (next 5 periods): 946 253.853858 947 253.845240 948 253.844840 949 253.844822 950 253.844821 Name: predicted_mean, dtype: float64 ============================================================ COMPLETE TIME SERIES ANALYSIS ============================================================ ✓ Unit root tests (ADF & KPSS) performed ✓ ACF/PACF correlograms analyzed (original & differenced) ✓ Cointegration testing (Johansen test) ✓ Optimal ARIMA models selected via AIC ✓ 30-day forecasts with 95% confidence intervals ✓ Residual diagnostics via correlograms Data: DIS (The Walt Disney Company) & AAPL (Apple) Period: 2022-01-01 to 2025-10-10
Conclusiones
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.stattools import adfuller, zivot_andrews
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()
# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")
# Function for Phillips-Perron test with interpretation
def phillips_perron_test(series, name):
print(f"\nPhillips-Perron Test for {name}:")
# Use adfuller with settings to approximate PP test
pp_result = adfuller(series, regression='c', autolag='AIC', maxlag=None)
print(f'PP Statistic: {pp_result[0]:.4f}')
print(f'p-value: {pp_result[1]:.4f}')
print(f'Critical Values: {pp_result[4]}')
print("Interpretation:")
if pp_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
print(" - Note: Using ADF with constant and automatic lag selection to approximate PP test")
print(" - PP test adjusts for serial correlation and heteroskedasticity non-parametrically")
return pp_result
# Function for Zivot-Andrews structural break test with interpretation
def zivot_andrews_test(series, name):
print(f"\nZivot-Andrews Structural Break Test for {name}:")
za_result = zivot_andrews(series, regression='c', autolag='AIC')
print(f'ZA Statistic: {za_result[0]:.4f}')
print(f'p-value: {za_result[1]:.4f}')
print(f'Critical Values: {za_result[2]}')
print(f'Breakpoint Index: {za_result[3]}')
print("Interpretation:")
if za_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} has a structural break")
print(f" - Breakpoint at index {za_result[3]} (position in series)")
else:
print(f" - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break")
print(" - ZA test allows for a single break in intercept and/or trend")
return za_result
# Perform Phillips-Perron tests
dis_pp = phillips_perron_test(dis_close, "DIS")
aapl_pp = phillips_perron_test(aapl_close, "AAPL")
# Perform Zivot-Andrews tests
dis_za = zivot_andrews_test(dis_close, "DIS")
aapl_za = zivot_andrews_test(aapl_close, "AAPL")
# Plot series with breakpoints
def plot_series_with_breakpoint(series, name, breakpoint_idx):
plt.figure(figsize=(12, 6))
plt.plot(series.index, series.values, label=f'{name} Adjusted Closing Prices')
# Convert breakpoint index to actual date
if 0 <= breakpoint_idx < len(series):
breakpoint_date = series.index[breakpoint_idx]
plt.axvline(x=breakpoint_date, color='red', linestyle='--',
label=f'Breakpoint ({breakpoint_date.date()})')
print(f" - Breakpoint date: {breakpoint_date.date()}")
else:
print(f" - Warning: Breakpoint index {breakpoint_idx} out of range (0-{len(series)-1})")
# Use a fallback vertical line at the middle of the series
mid_idx = len(series) // 2
mid_date = series.index[mid_idx]
plt.axvline(x=mid_date, color='orange', linestyle=':',
label='Reference line (invalid breakpoint)')
plt.title(f'{name} Adjusted Closing Prices with Structural Break')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"\nPlot Interpretation for {name}:")
print(f" - Red dashed line indicates the detected structural break")
print(" - Break may reflect significant market events, policy changes, or economic shifts")
print(" - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)")
print(" - For DIS and AAPL, consider entertainment or technology")
# Plot series with breakpoints using the series' own datetime index
plot_series_with_breakpoint(dis_close, "DIS (The Walt Disney Company)", int(dis_za[3]))
plot_series_with_breakpoint(aapl_close, "AAPL (Apple)", int(aapl_za[3]))
# Summary analysis
print("\n" + "="*70)
print("STRUCTURAL BREAK ANALYSIS SUMMARY")
print("="*70)
print(f"DIS Phillips-Perron p-value: {dis_pp[1]:.4f} {'(Stationary)' if dis_pp[1]<0.05 else '(Non-stationary)'}")
print(f"AAPL Phillips-Perron p-value: {aapl_pp[1]:.4f} {'(Stationary)' if aapl_pp[1]<0.05 else '(Non-stationary)'}")
print(f"\nDIS Zivot-Andrews p-value: {dis_za[1]:.4f} {'(Structural break detected)' if dis_za[1]<0.05 else '(No clear break)'}")
print(f"DIS Breakpoint: {dis_close.index[int(dis_za[3])].date() if 0 <= int(dis_za[3]) < len(dis_close) else 'Invalid'}")
print(f"AAPL Zivot-Andrews p-value: {aapl_za[1]:.4f} {'(Structural break detected)' if aapl_za[1]<0.05 else '(No clear break)'}")
print(f"AAPL Breakpoint: {aapl_close.index[int(aapl_za[3])].date() if 0 <= int(aapl_za[3]) < len(aapl_close) else 'Invalid'}")
print(f"\nData Period: {start_date} to {end_date}")
print("Analysis covers potential impacts from:")
print("- COVID-19 pandemic effects and recovery")
print("- Interest rate changes and inflation")
print("- Supply chain disruptions")
print("- Company-specific events (mergers, product launches, regulatory changes)")
print("- Broader market volatility")
print("\nRecommendations:")
print("1. If structural breaks detected, consider regime-switching models")
print("2. For non-stationary series, use differencing or cointegration approaches")
print("3. Investigate specific events around breakpoint dates")
print("4. Consider sector-specific factors (entertainment for DIS, technology for AAPL)")
print("5. Validate breakpoints with external economic calendars and company news")
[*********************100%***********************] 2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Phillips-Perron Test for DIS:
PP Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value < 0.05: Reject null hypothesis - DIS is stationary
- Note: Using ADF with constant and automatic lag selection to approximate PP test
- PP test adjusts for serial correlation and heteroskedasticity non-parametrically
Phillips-Perron Test for AAPL:
PP Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value >= 0.05: Fail to reject null - AAPL may be non-stationary
- Note: Using ADF with constant and automatic lag selection to approximate PP test
- PP test adjusts for serial correlation and heteroskedasticity non-parametrically
Zivot-Andrews Structural Break Test for DIS:
ZA Statistic: -3.7757
p-value: 0.4962
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 0
Interpretation:
- p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
- ZA test allows for a single break in intercept and/or trend
Zivot-Andrews Structural Break Test for AAPL:
ZA Statistic: -3.9600
p-value: 0.3759
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 1
Interpretation:
- p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
- ZA test allows for a single break in intercept and/or trend
- Breakpoint date: 2022-01-03
Plot Interpretation for DIS (The Walt Disney Company): - Red dashed line indicates the detected structural break - Break may reflect significant market events, policy changes, or economic shifts - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact) - For DIS and AAPL, consider entertainment or technology - Breakpoint date: 2022-01-04
Plot Interpretation for AAPL (Apple): - Red dashed line indicates the detected structural break - Break may reflect significant market events, policy changes, or economic shifts - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact) - For DIS and AAPL, consider entertainment or technology ====================================================================== STRUCTURAL BREAK ANALYSIS SUMMARY ====================================================================== DIS Phillips-Perron p-value: 0.0145 (Stationary) AAPL Phillips-Perron p-value: 0.8089 (Non-stationary) DIS Zivot-Andrews p-value: 0.4962 (No clear break) DIS Breakpoint: 2022-01-03 AAPL Zivot-Andrews p-value: 0.3759 (No clear break) AAPL Breakpoint: 2022-01-04 Data Period: 2022-01-01 to 2025-10-10 Analysis covers potential impacts from: - COVID-19 pandemic effects and recovery - Interest rate changes and inflation - Supply chain disruptions - Company-specific events (mergers, product launches, regulatory changes) - Broader market volatility Recommendations: 1. If structural breaks detected, consider regime-switching models 2. For non-stationary series, use differencing or cointegration approaches 3. Investigate specific events around breakpoint dates 4. Consider sector-specific factors (entertainment for DIS, technology for AAPL) 5. Validate breakpoints with external economic calendars and company news
NVIDIA (NVDA) vs APTIV (APTV) | 2022-01-01 to 2025-10-10¶
In [1]:
import yfinance as yf
import pandas as pd
# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'
# Fetch stock data for APTV
aptv = yf.download('APTV', start=start_date, end=end_date)
# Display the first 5 rows
print(aptv.head(5))
/tmp/ipython-input-2676107814.py:9: FutureWarning: YF.download() has changed argument auto_adjust default to True
aptv = yf.download('APTV', start=start_date, end=end_date)
[*********************100%***********************] 1 of 1 completed
Price Close High Low Open Volume Ticker APTV APTV APTV APTV APTV Date 2022-01-03 166.029999 168.229996 165.000000 166.380005 1184400 2022-01-04 172.210007 174.380005 167.289993 167.649994 1713900 2022-01-05 168.279999 173.940002 168.259995 172.500000 1205500 2022-01-06 174.119995 174.259995 165.500000 168.880005 2041000 2022-01-07 169.470001 175.910004 169.360001 173.770004 1502400
In [2]:
import yfinance as yf
import pandas as pd
# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'
# Fetch stock data for NVDA
nvda = yf.download('NVDA', start=start_date, end=end_date)
# Display the first 5 rows
print(nvda.head(5))
/tmp/ipython-input-4105848134.py:9: FutureWarning: YF.download() has changed argument auto_adjust default to True
nvda = yf.download('NVDA', start=start_date, end=end_date)
[*********************100%***********************] 1 of 1 completed
Price Close High Low Open Volume Ticker NVDA NVDA NVDA NVDA NVDA Date 2022-01-03 30.064438 30.653330 29.729068 29.759013 391547000 2022-01-04 29.234997 30.410784 28.295764 30.220143 527154000 2022-01-05 27.552164 29.360762 27.481298 28.894638 498064000 2022-01-06 28.125084 28.384596 27.014175 27.588094 454186000 2022-01-07 27.195837 28.368632 27.006193 28.088159 409939000
In [3]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
# Suppress the FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning)
# Download stock data from Yahoo Finance
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']
# Download data with explicit auto_adjust=False to get Adj Close column
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']
# If the above fails, use this alternative approach:
# data = yf.download(tickers, start=start_date, end=end_date)
# data = data['Close'] # Use regular Close prices instead
# Extract individual series
nvda_close = data['NVDA']
aptv_close = data['APTV']
print("NVDA data length:", len(nvda_close))
print("APTV data length:", len(aptv_close))
print("\nNVDA data sample:")
print(nvda_close.head())
print("\nAPTV data sample:")
print(aptv_close.head())
# Remove any NaN values
nvda_close = nvda_close.dropna()
aptv_close = aptv_close.dropna()
# Align by common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]
print(f"\nCommon data length after alignment: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0]} to {nvda_close.index[-1]}")
# Function for unit root tests
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
# KPSS Test
kpss_result = kpss(series, regression='c')
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")
# Difference the series if non-stationary
nvda_diff = nvda_close.diff().dropna()
aptv_diff = aptv_close.diff().dropna()
# Function to find best ARMA model (using ARIMA with d=0)
def find_best_arma(series, name, max_p=3, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, 0, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, 0, q)
except:
continue
print(f"\nBest ARMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
best_model = ARIMA(series, order=best_order).fit()
return best_model
# Fit ARMA models
nvda_arma = find_best_arma(nvda_diff, "NVDA")
aptv_arma = find_best_arma(aptv_diff, "APTV")
# Cointegration test
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]: # 95% critical value
print(f"r = {i}: Cointegration exists at 95% confidence level")
else:
print(f"r = {i}: No cointegration at 95% confidence level")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'NVDA': nvda_close,
'APTV': aptv_close
})
# Run cointegration test
cointegration_test(coint_df)
# Plot the series
plt.figure(figsize=(12,6))
plt.plot(nvda_close, label='NVDA')
plt.plot(aptv_close, label='APTV')
plt.title('NVDA vs APTV Adjusted Closing Prices')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Plot the differenced series
plt.figure(figsize=(12,6))
plt.plot(nvda_diff, label='NVDA Diff', alpha=0.7)
plt.plot(aptv_diff, label='APTV Diff', alpha=0.7)
plt.title('Differenced Series')
plt.xlabel('Date')
plt.ylabel('Price Change ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Print summary statistics
print("\nSummary Statistics:")
print(nvda_close.describe())
print("\n")
print(aptv_close.describe())
[*********************100%***********************] 2 of 2 completed /tmp/ipython-input-2917502969.py:62: InterpolationWarning: The test statistic is outside of the range of p-values available in the look-up table. The actual p-value is smaller than the p-value returned. kpss_result = kpss(series, regression='c') /tmp/ipython-input-2917502969.py:62: InterpolationWarning: The test statistic is outside of the range of p-values available in the look-up table. The actual p-value is smaller than the p-value returned. kpss_result = kpss(series, regression='c') /usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq)
NVDA data length: 946
APTV data length: 946
NVDA data sample:
Date
2022-01-03 30.064438
2022-01-04 29.234997
2022-01-05 27.552164
2022-01-06 28.125084
2022-01-07 27.195837
Name: NVDA, dtype: float64
APTV data sample:
Date
2022-01-03 166.029999
2022-01-04 172.210007
2022-01-05 168.279999
2022-01-06 174.119995
2022-01-07 169.470001
Name: APTV, dtype: float64
Common data length after alignment: 946
Date range: 2022-01-03 00:00:00 to 2025-10-09 00:00:00
Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
Best ARMA model for NVDA: Order: (3, 0, 1) AIC: 4549.07
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
self._init_dates(dates, freq)
Best ARMA model for APTV: Order: (2, 0, 2) AIC: 4300.16
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq) /usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting. self._init_dates(dates, freq)
Johansen Cointegration Test: Trace statistic: [25.17646653 0.25379238] Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349] [ 2.7055 3.8415 6.6349]] r = 0: Cointegration exists at 95% confidence level r = 1: No cointegration at 95% confidence level
Summary Statistics: count 946.000000 mean 72.756528 std 53.692774 min 11.213528 25% 23.265495 50% 47.770348 75% 122.504368 max 192.570007 Name: NVDA, dtype: float64 count 946.000000 mean 87.566406 std 21.083205 min 47.919998 25% 70.115000 50% 85.309998 75% 101.609999 max 174.119995 Name: APTV, dtype: float64
Conclusiones:
In [4]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()
# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")
# Function for unit root tests
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")
# Cointegration test
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f"r = {i}: Cointegration exists at 95% confidence level")
else:
print(f"r = {i}: No cointegration at 95% confidence level")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'NVDA': nvda_close,
'APTV': aptv_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
return best_order
# Find and fit best ARIMA models
nvda_order = find_best_arima(nvda_close, "NVDA")
aptv_order = find_best_arima(aptv_close, "APTV")
# Fit final ARIMA models
nvda_model = ARIMA(nvda_close, order=nvda_order).fit()
aptv_model = ARIMA(aptv_close, order=aptv_order).fit()
# Forecast next 30 periods
forecast_steps = 30
nvda_forecast = nvda_model.forecast(steps=forecast_steps)
aptv_forecast = aptv_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = nvda_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(nvda_close.index, nvda_close, label='NVDA Historical')
plt.plot(forecast_index, nvda_forecast, label='NVDA Forecast', color='red')
plt.plot(aptv_close.index, aptv_close, label='APTV Historical')
plt.plot(forecast_index, aptv_forecast, label='APTV Forecast', color='green')
plt.title('NVDA and APTV Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Plot detailed forecast with confidence intervals
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Generate detailed forecast plots
plot_forecast(nvda_model, nvda_close, "NVDA")
plot_forecast(aptv_model, aptv_close, "APTV")
# Print forecast values
print("\nNVDA Forecast Values (next 5 periods):")
print(nvda_forecast[:5])
print("\nAPTV Forecast Values (next 5 periods):")
print(aptv_forecast[:5])
# Print model diagnostics
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
print(f"\nNVDA Model: ARIMA{nvda_order}")
print(f"APTV Model: ARIMA{aptv_order}")
print("\nNote: Use model.summary() for detailed parameter estimates and diagnostics")
print("The models automatically handle:")
print("- Unit root testing (via optimal differencing d)")
print("- Cointegration analysis (Johansen test)")
print("- Optimal parameter selection (AIC minimization)")
print("- 30-day ahead forecasting with confidence intervals")
[*********************100%***********************] 2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Johansen Cointegration Test:
Trace statistic: [25.17646653 0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
r = 0: Cointegration exists at 95% confidence level
r = 1: No cointegration at 95% confidence level
Best ARIMA model for NVDA: Order: (3, 1, 1) AIC: 4552.23 Best ARIMA model for APTV: Order: (2, 2, 3) AIC: 4294.53
NVDA Forecast Values (next 5 periods): 946 192.855097 947 192.825415 948 192.379674 949 192.193828 950 192.082262 Name: predicted_mean, dtype: float64 APTV Forecast Values (next 5 periods): 946 82.800159 947 82.522239 948 82.613240 949 82.375788 950 82.389600 Name: predicted_mean, dtype: float64 ================================================== MODEL DIAGNOSTICS ================================================== NVDA Model: ARIMA(3, 1, 1) APTV Model: ARIMA(2, 2, 3) Note: Use model.summary() for detailed parameter estimates and diagnostics The models automatically handle: - Unit root testing (via optimal differencing d) - Cointegration analysis (Johansen test) - Optimal parameter selection (AIC minimization) - 30-day ahead forecasting with confidence intervals
Conclusiones 2
In [5]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()
# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")
# Function for unit root tests with interpretation
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
print("Interpretation:")
if adf_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
print("Interpretation:")
if kpss_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be stationary")
# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")
# Cointegration test with interpretation
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
print("Interpretation:")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f" - r = {i}: Cointegration exists at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
else:
print(f" - r = {i}: No cointegration at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
if result.lr1[0] > result.cvt[0, 1]:
print("Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship")
else:
print("Conclusion: No evidence of cointegration between NVDA and APTV")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'NVDA': nvda_close,
'APTV': aptv_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
print("Interpretation:")
print(f" - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
print(f" - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
print(f" - q={best_order[2]}: {best_order[2]} moving average term(s)")
return best_order
# Find and fit best ARIMA models
nvda_order = find_best_arima(nvda_close, "NVDA")
aptv_order = find_best_arima(aptv_close, "APTV")
# Fit final ARIMA models
nvda_model = ARIMA(nvda_close, order=nvda_order).fit()
aptv_model = ARIMA(aptv_close, order=aptv_order).fit()
# Forecast next 30 periods
forecast_steps = 30
nvda_forecast = nvda_model.forecast(steps=forecast_steps)
aptv_forecast = aptv_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = nvda_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(nvda_close.index, nvda_close, label='NVDA Historical')
plt.plot(forecast_index, nvda_forecast, label='NVDA Forecast', color='red')
plt.plot(aptv_close.index, aptv_close, label='APTV Historical')
plt.plot(forecast_index, aptv_forecast, label='APTV Forecast', color='green')
plt.title('NVDA and APTV Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Forecast interpretation
last_value = series.iloc[-1]
mean_forecast = forecast.mean()
print(f"\nForecast Interpretation for {name}:")
print(f"Last observed value: ${last_value:.2f}")
print(f"Average forecast value: ${mean_forecast:.2f}")
print(f"Forecast change: ${mean_forecast - last_value:.2f}")
if mean_forecast > last_value:
print("Trend: Upward forecast trend")
elif mean_forecast < last_value:
print("Trend: Downward forecast trend")
else:
print("Trend: Flat forecast trend")
print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
# Generate detailed forecast plots and interpretations
plot_forecast(nvda_model, nvda_close, "NVDA")
plot_forecast(aptv_model, aptv_close, "APTV")
# Print forecast values
print("\nNVDA Forecast Values (next 5 periods):")
print(nvda_forecast[:5])
print("\nAPTV Forecast Values (next 5 periods):")
print(aptv_forecast[:5])
print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print("✓ Unit root tests performed (ADF & KPSS)")
print("✓ Cointegration analysis completed (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC minimization")
print("✓ 30-day forecasts generated with 95% confidence intervals")
print("✓ Detailed interpretations provided for all results")
print(f"\nData period: {start_date} to {end_date}")
print(f"Tickers analyzed: NVDA (NVIDIA) and APTV (Aptiv)")
[*********************100%***********************] 2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
Interpretation:
- p-value >= 0.05: Fail to reject null - NVDA may be non-stationary
KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - NVDA is non-stationary
Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value < 0.05: Reject null hypothesis - APTV is stationary
KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - APTV is non-stationary
Johansen Cointegration Test:
Trace statistic: [25.17646653 0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
Interpretation:
- r = 0: Cointegration exists at 95% confidence level
Trace statistic (25.18) > 95% critical value (15.49)
- r = 1: No cointegration at 95% confidence level
Trace statistic (0.25) <= 95% critical value (3.84)
Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship
Best ARIMA model for NVDA: Order: (3, 1, 1) AIC: 4552.23 Interpretation: - p=3: 3 autoregressive term(s) - d=1: 1 difference(s) needed for stationarity - q=1: 1 moving average term(s) Best ARIMA model for APTV: Order: (2, 2, 3) AIC: 4294.53 Interpretation: - p=2: 2 autoregressive term(s) - d=2: 2 difference(s) needed for stationarity - q=3: 3 moving average term(s)
Forecast Interpretation for NVDA: Last observed value: $192.57 Average forecast value: $192.18 Forecast change: $-0.39 Trend: Downward forecast trend 95% CI range at period 30: [$166.43, $217.82]
Forecast Interpretation for APTV: Last observed value: $82.71 Average forecast value: $81.43 Forecast change: $-1.28 Trend: Downward forecast trend 95% CI range at period 30: [$54.94, $105.53] NVDA Forecast Values (next 5 periods): 946 192.855097 947 192.825415 948 192.379674 949 192.193828 950 192.082262 Name: predicted_mean, dtype: float64 APTV Forecast Values (next 5 periods): 946 82.800159 947 82.522239 948 82.613240 949 82.375788 950 82.389600 Name: predicted_mean, dtype: float64 ============================================================ ANALYSIS COMPLETE ============================================================ ✓ Unit root tests performed (ADF & KPSS) ✓ Cointegration analysis completed (Johansen test) ✓ Optimal ARIMA models selected via AIC minimization ✓ 30-day forecasts generated with 95% confidence intervals ✓ Detailed interpretations provided for all results Data period: 2022-01-01 to 2025-10-10 Tickers analyzed: NVDA (NVIDIA) and APTV (Aptiv)
Conclusión 3
In [6]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()
# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")
# Function for unit root tests with interpretation
def unit_root_tests(series, name):
print(f"\nUnit Root Tests for {name}:")
# ADF Test
adf_result = adfuller(series)
print("ADF Test:")
print(f'ADF Statistic: {adf_result[0]:.4f}')
print(f'p-value: {adf_result[1]:.4f}')
print(f'Critical Values: {adf_result[4]}')
print("Interpretation:")
if adf_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
# KPSS Test
kpss_result = kpss(series)
print("\nKPSS Test:")
print(f'KPSS Statistic: {kpss_result[0]:.4f}')
print(f'p-value: {kpss_result[1]:.4f}')
print(f'Critical Values: {kpss_result[3]}')
print("Interpretation:")
if kpss_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be stationary")
# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")
# Function to plot correlograms with interpretation
def plot_correlograms(series, name, lags=30):
plt.figure(figsize=(12, 8))
plt.subplot(2, 1, 1)
plot_acf(series, lags=lags, ax=plt.gca())
plt.title(f'ACF for {name}')
plt.subplot(2, 1, 2)
plot_pacf(series, lags=lags, ax=plt.gca())
plt.title(f'PACF for {name}')
plt.tight_layout()
plt.show()
print(f"\nCorrelogram Interpretation for {name}:")
print(" - ACF: Shows total correlation at each lag, including indirect effects")
print(" - PACF: Shows direct correlation at each lag, controlling for earlier lags")
print(" - Significant spikes outside the blue confidence interval suggest strong correlations")
print(" - ACF decay pattern indicates potential ARIMA model orders")
print(" - PACF cutoff suggests AR order, while ACF cutoff suggests MA order")
# Plot correlograms for original series
plot_correlograms(nvda_close, "NVDA Original")
plot_correlograms(aptv_close, "APTV Original")
# Difference the series
nvda_diff = nvda_close.diff().dropna()
aptv_diff = aptv_close.diff().dropna()
# Plot correlograms for differenced series
plot_correlograms(nvda_diff, "NVDA Differenced")
plot_correlograms(aptv_diff, "APTV Differenced")
# Cointegration test with interpretation
def cointegration_test(df):
result = coint_johansen(df, det_order=0, k_ar_diff=1)
print("\nJohansen Cointegration Test:")
print(f"Trace statistic: {result.lr1}")
print(f"Critical values (90%, 95%, 99%): {result.cvt}")
print("Interpretation:")
for i in range(len(result.lr1)):
if result.lr1[i] > result.cvt[i, 1]:
print(f" - r = {i}: Cointegration exists at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
else:
print(f" - r = {i}: No cointegration at 95% confidence level")
print(f" Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
if result.lr1[0] > result.cvt[0, 1]:
print("Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship")
else:
print("Conclusion: No evidence of cointegration between NVDA and APTV")
# Prepare data for cointegration
coint_df = pd.DataFrame({
'NVDA': nvda_close,
'APTV': aptv_close
}).dropna()
cointegration_test(coint_df)
# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
best_aic = float('inf')
best_order = None
for p in range(max_p + 1):
for d in range(max_d + 1):
for q in range(max_q + 1):
try:
model = ARIMA(series, order=(p, d, q))
results = model.fit()
if results.aic < best_aic:
best_aic = results.aic
best_order = (p, d, q)
except:
continue
print(f"\nBest ARIMA model for {name}:")
print(f"Order: {best_order}")
print(f"AIC: {best_aic:.2f}")
print("Interpretation:")
print(f" - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
print(f" - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
print(f" - q={best_order[2]}: {best_order[2]} moving average term(s)")
return best_order
# Find and fit best ARIMA models
nvda_order = find_best_arima(nvda_close, "NVDA")
aptv_order = find_best_arima(aptv_close, "APTV")
# Fit final ARIMA models
nvda_model = ARIMA(nvda_close, order=nvda_order).fit()
aptv_model = ARIMA(aptv_close, order=aptv_order).fit()
# Forecast next 30 periods
forecast_steps = 30
nvda_forecast = nvda_model.forecast(steps=forecast_steps)
aptv_forecast = aptv_model.forecast(steps=forecast_steps)
# Create forecast index using business days
last_date = nvda_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=forecast_steps, freq='B')
# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(nvda_close.index, nvda_close, label='NVDA Historical')
plt.plot(forecast_index, nvda_forecast, label='NVDA Forecast', color='red')
plt.plot(aptv_close.index, aptv_close, label='APTV Historical')
plt.plot(forecast_index, aptv_forecast, label='APTV Forecast', color='green')
plt.title('NVDA and APTV Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
forecast_obj = model.get_forecast(steps=steps)
forecast = forecast_obj.predicted_mean
conf_int = forecast_obj.conf_int()
# Create proper date index for forecast
last_date = series.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
periods=steps, freq='B')
plt.figure(figsize=(12,6))
plt.plot(series.index, series, label=f'{name} Historical')
plt.plot(forecast_index, forecast, label='Forecast', color='red')
plt.fill_between(forecast_index,
conf_int.iloc[:, 0],
conf_int.iloc[:, 1],
color='pink',
alpha=0.3,
label='95% Confidence Interval')
plt.title(f'{name} Price Forecast')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Forecast interpretation
last_value = series.iloc[-1]
mean_forecast = forecast.mean()
print(f"\nForecast Interpretation for {name}:")
print(f"Last observed value: ${last_value:.2f}")
print(f"Average forecast value: ${mean_forecast:.2f}")
print(f"Forecast change: ${mean_forecast - last_value:.2f}")
if mean_forecast > last_value:
print("Trend: Upward forecast trend")
elif mean_forecast < last_value:
print("Trend: Downward forecast trend")
else:
print("Trend: Flat forecast trend")
print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
print("Interpretation: The wider the confidence interval, the less certain the forecast")
# Generate detailed forecast plots and interpretations
plot_forecast(nvda_model, nvda_close, "NVDA")
plot_forecast(aptv_model, aptv_close, "APTV")
# Plot correlograms for model residuals
plot_correlograms(nvda_model.resid, "NVDA ARIMA Residuals")
plot_correlograms(aptv_model.resid, "APTV ARIMA Residuals")
# Print forecast values
print("\nNVDA Forecast Values (next 5 periods):")
print(nvda_forecast[:5])
print("\nAPTV Forecast Values (next 5 periods):")
print(aptv_forecast[:5])
print("\n" + "="*60)
print("COMPLETE TIME SERIES ANALYSIS")
print("="*60)
print("✓ Unit root tests (ADF & KPSS) performed")
print("✓ ACF/PACF correlograms analyzed (original & differenced)")
print("✓ Cointegration testing (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC")
print("✓ 30-day forecasts with 95% confidence intervals")
print("✓ Residual diagnostics via correlograms")
print(f"\nData: NVDA (NVIDIA) & APTV (Aptiv)")
print(f"Period: {start_date} to {end_date}")
[*********************100%***********************] 2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance... Data successfully downloaded and aligned! Common data points: 946 Date range: 2022-01-03 to 2025-10-09 Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
Interpretation:
- p-value >= 0.05: Fail to reject null - NVDA may be non-stationary
KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - NVDA is non-stationary
Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value < 0.05: Reject null hypothesis - APTV is stationary
KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
- p-value < 0.05: Reject null hypothesis - APTV is non-stationary
Correlogram Interpretation for NVDA Original: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for APTV Original: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for NVDA Differenced: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for APTV Differenced:
- ACF: Shows total correlation at each lag, including indirect effects
- PACF: Shows direct correlation at each lag, controlling for earlier lags
- Significant spikes outside the blue confidence interval suggest strong correlations
- ACF decay pattern indicates potential ARIMA model orders
- PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Johansen Cointegration Test:
Trace statistic: [25.17646653 0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
[ 2.7055 3.8415 6.6349]]
Interpretation:
- r = 0: Cointegration exists at 95% confidence level
Trace statistic (25.18) > 95% critical value (15.49)
- r = 1: No cointegration at 95% confidence level
Trace statistic (0.25) <= 95% critical value (3.84)
Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship
Best ARIMA model for NVDA:
Order: (3, 1, 1)
AIC: 4552.23
Interpretation:
- p=3: 3 autoregressive term(s)
- d=1: 1 difference(s) needed for stationarity
- q=1: 1 moving average term(s)
Best ARIMA model for APTV:
Order: (2, 2, 3)
AIC: 4294.53
Interpretation:
- p=2: 2 autoregressive term(s)
- d=2: 2 difference(s) needed for stationarity
- q=3: 3 moving average term(s)
Forecast Interpretation for NVDA: Last observed value: $192.57 Average forecast value: $192.18 Forecast change: $-0.39 Trend: Downward forecast trend 95% CI range at period 30: [$166.43, $217.82] Interpretation: The wider the confidence interval, the less certain the forecast
Forecast Interpretation for APTV: Last observed value: $82.71 Average forecast value: $81.43 Forecast change: $-1.28 Trend: Downward forecast trend 95% CI range at period 30: [$54.94, $105.53] Interpretation: The wider the confidence interval, the less certain the forecast
Correlogram Interpretation for NVDA ARIMA Residuals: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
Correlogram Interpretation for APTV ARIMA Residuals: - ACF: Shows total correlation at each lag, including indirect effects - PACF: Shows direct correlation at each lag, controlling for earlier lags - Significant spikes outside the blue confidence interval suggest strong correlations - ACF decay pattern indicates potential ARIMA model orders - PACF cutoff suggests AR order, while ACF cutoff suggests MA order NVDA Forecast Values (next 5 periods): 946 192.855097 947 192.825415 948 192.379674 949 192.193828 950 192.082262 Name: predicted_mean, dtype: float64 APTV Forecast Values (next 5 periods): 946 82.800159 947 82.522239 948 82.613240 949 82.375788 950 82.389600 Name: predicted_mean, dtype: float64 ============================================================ COMPLETE TIME SERIES ANALYSIS ============================================================ ✓ Unit root tests (ADF & KPSS) performed ✓ ACF/PACF correlograms analyzed (original & differenced) ✓ Cointegration testing (Johansen test) ✓ Optimal ARIMA models selected via AIC ✓ 30-day forecasts with 95% confidence intervals ✓ Residual diagnostics via correlograms Data: NVDA (NVIDIA) & APTV (Aptiv) Period: 2022-01-01 to 2025-10-10
Conclusiones 4
In [7]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.stattools import adfuller, zivot_andrews
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']
# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()
# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]
print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")
# Function for Phillips-Perron test with interpretation
def phillips_perron_test(series, name):
print(f"\nPhillips-Perron Test for {name}:")
# Use adfuller with settings to approximate PP test
pp_result = adfuller(series, regression='c', autolag='AIC', maxlag=None)
print(f'PP Statistic: {pp_result[0]:.4f}')
print(f'p-value: {pp_result[1]:.4f}')
print(f'Critical Values: {pp_result[4]}')
print("Interpretation:")
if pp_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} is stationary")
else:
print(f" - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
print(" - Note: Using ADF with constant and automatic lag selection to approximate PP test")
print(" - PP test adjusts for serial correlation and heteroskedasticity non-parametrically")
return pp_result
# Function for Zivot-Andrews structural break test with interpretation
def zivot_andrews_test(series, name):
print(f"\nZivot-Andrews Structural Break Test for {name}:")
za_result = zivot_andrews(series, regression='c', autolag='AIC')
print(f'ZA Statistic: {za_result[0]:.4f}')
print(f'p-value: {za_result[1]:.4f}')
print(f'Critical Values: {za_result[2]}')
print(f'Breakpoint Index: {za_result[3]}')
print("Interpretation:")
if za_result[1] < 0.05:
print(f" - p-value < 0.05: Reject null hypothesis - {name} has a structural break")
print(f" - Breakpoint at index {za_result[3]} (position in series)")
else:
print(f" - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break")
print(" - ZA test allows for a single break in intercept and/or trend")
return za_result
# Perform Phillips-Perron tests
nvda_pp = phillips_perron_test(nvda_close, "NVDA")
aptv_pp = phillips_perron_test(aptv_close, "APTV")
# Perform Zivot-Andrews tests
nvda_za = zivot_andrews_test(nvda_close, "NVDA")
aptv_za = zivot_andrews_test(aptv_close, "APTV")
# Plot series with breakpoints
def plot_series_with_breakpoint(series, name, breakpoint_idx):
plt.figure(figsize=(12, 6))
plt.plot(series.index, series.values, label=f'{name} Adjusted Closing Prices')
# Convert breakpoint index to actual date
if 0 <= breakpoint_idx < len(series):
breakpoint_date = series.index[breakpoint_idx]
plt.axvline(x=breakpoint_date, color='red', linestyle='--',
label=f'Breakpoint ({breakpoint_date.date()})')
print(f" - Breakpoint date: {breakpoint_date.date()}")
else:
print(f" - Warning: Breakpoint index {breakpoint_idx} out of range (0-{len(series)-1})")
# Use a fallback vertical line at the middle of the series
mid_idx = len(series) // 2
mid_date = series.index[mid_idx]
plt.axvline(x=mid_date, color='orange', linestyle=':',
label='Reference line (invalid breakpoint)')
plt.title(f'{name} Adjusted Closing Prices with Structural Break')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print(f"\nPlot Interpretation for {name}:")
print(f" - Red dashed line indicates the detected structural break")
print(" - Break may reflect significant market events, policy changes, or economic shifts")
print(" - Analyze data around this point for potential causes (e.g., earnings, news, tech sector trends)")
print(" - For NVDA and APTV, consider semiconductor and automotive tech events or AI developments")
# Plot series with breakpoints using the series' own datetime index
plot_series_with_breakpoint(nvda_close, "NVDA (NVIDIA)", int(nvda_za[3]))
plot_series_with_breakpoint(aptv_close, "APTV (Aptiv)", int(aptv_za[3]))
# Summary analysis
print("\n" + "="*70)
print("STRUCTURAL BREAK ANALYSIS SUMMARY")
print("="*70)
print(f"NVDA Phillips-Perron p-value: {nvda_pp[1]:.4f} {'(Stationary)' if nvda_pp[1]<0.05 else '(Non-stationary)'}")
print(f"APTV Phillips-Perron p-value: {aptv_pp[1]:.4f} {'(Stationary)' if aptv_pp[1]<0.05 else '(Non-stationary)'}")
print(f"\nNVDA Zivot-Andrews p-value: {nvda_za[1]:.4f} {'(Structural break detected)' if nvda_za[1]<0.05 else '(No clear break)'}")
print(f"NVDA Breakpoint: {nvda_close.index[int(nvda_za[3])].date() if 0 <= int(nvda_za[3]) < len(nvda_close) else 'Invalid'}")
print(f"APTV Zivot-Andrews p-value: {aptv_za[1]:.4f} {'(Structural break detected)' if aptv_za[1]<0.05 else '(No clear break)'}")
print(f"APTV Breakpoint: {aptv_close.index[int(aptv_za[3])].date() if 0 <= int(aptv_za[3]) < len(aptv_close) else 'Invalid'}")
print(f"\nData Period: {start_date} to {end_date}")
print("Analysis covers potential impacts from:")
print("- COVID-19 pandemic effects and recovery")
print("- Interest rate changes and inflation")
print("- Supply chain disruptions")
print("- Company-specific events (chip shortages, AI advancements, automotive tech developments)")
print("- Broader market volatility in tech and automotive sectors")
print("\nRecommendations:")
print("1. If structural breaks detected, consider regime-switching models")
print("2. For non-stationary series, use differencing or cointegration approaches")
print("3. Investigate specific events around breakpoint dates")
print("4. Consider sector-specific factors (semiconductors for NVDA, automotive tech for APTV)")
print("5. Validate breakpoints with external economic calendars and company news")
[ 0% ] [*********************100%***********************] 2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09
Phillips-Perron Test for NVDA:
PP Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
Interpretation:
- p-value >= 0.05: Fail to reject null - NVDA may be non-stationary
- Note: Using ADF with constant and automatic lag selection to approximate PP test
- PP test adjusts for serial correlation and heteroskedasticity non-parametrically
Phillips-Perron Test for APTV:
PP Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
- p-value < 0.05: Reject null hypothesis - APTV is stationary
- Note: Using ADF with constant and automatic lag selection to approximate PP test
- PP test adjusts for serial correlation and heteroskedasticity non-parametrically
Zivot-Andrews Structural Break Test for NVDA:
ZA Statistic: -2.8713
p-value: 0.9417
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 6
Interpretation:
- p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
- ZA test allows for a single break in intercept and/or trend
Zivot-Andrews Structural Break Test for APTV:
ZA Statistic: -4.6936
p-value: 0.0718
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 0
Interpretation:
- p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
- ZA test allows for a single break in intercept and/or trend
- Breakpoint date: 2022-01-11
Plot Interpretation for NVDA (NVIDIA): - Red dashed line indicates the detected structural break - Break may reflect significant market events, policy changes, or economic shifts - Analyze data around this point for potential causes (e.g., earnings, news, tech sector trends) - For NVDA and APTV, consider semiconductor and automotive tech events or AI developments - Breakpoint date: 2022-01-03
Plot Interpretation for APTV (Aptiv): - Red dashed line indicates the detected structural break - Break may reflect significant market events, policy changes, or economic shifts - Analyze data around this point for potential causes (e.g., earnings, news, tech sector trends) - For NVDA and APTV, consider semiconductor and automotive tech events or AI developments ====================================================================== STRUCTURAL BREAK ANALYSIS SUMMARY ====================================================================== NVDA Phillips-Perron p-value: 0.9943 (Non-stationary) APTV Phillips-Perron p-value: 0.0034 (Stationary) NVDA Zivot-Andrews p-value: 0.9417 (No clear break) NVDA Breakpoint: 2022-01-11 APTV Zivot-Andrews p-value: 0.0718 (No clear break) APTV Breakpoint: 2022-01-03 Data Period: 2022-01-01 to 2025-10-10 Analysis covers potential impacts from: - COVID-19 pandemic effects and recovery - Interest rate changes and inflation - Supply chain disruptions - Company-specific events (chip shortages, AI advancements, automotive tech developments) - Broader market volatility in tech and automotive sectors Recommendations: 1. If structural breaks detected, consider regime-switching models 2. For non-stationary series, use differencing or cointegration approaches 3. Investigate specific events around breakpoint dates 4. Consider sector-specific factors (semiconductors for NVDA, automotive tech for APTV) 5. Validate breakpoints with external economic calendars and company news
In [8]:
# PARA EXPORTAR
from google.colab import files
import nbformat
from nbconvert import HTMLExporter
# Paso 1: Cargar tu archivo .ipynb
print("Por favor, selecciona tu archivo .ipynb")
uploaded = files.upload()
# Paso 2: Obtener el nombre del archivo cargado
notebook_filename = list(uploaded.keys())[0]
print(f"\nArchivo cargado: {notebook_filename}")
# Paso 3: Leer el notebook
with open(notebook_filename, 'r', encoding='utf-8') as f:
notebook = nbformat.read(f, as_version=4)
# Paso 4: Convertir a HTML
print("Convirtiendo a HTML...")
html_exporter = HTMLExporter()
html_exporter.template_name = 'classic'
(body, resources) = html_exporter.from_notebook_node(notebook)
# Paso 5: Guardar el archivo HTML
html_filename = notebook_filename.replace('.ipynb', '.html')
with open(html_filename, 'w', encoding='utf-8') as f:
f.write(body)
print(f"Conversión completada: {html_filename}")
# Paso 6: Descargar el archivo HTML
print("Descargando archivo HTML...")
files.download(html_filename)
print("¡Listo! Tu archivo HTML ha sido descargado.")
Por favor, selecciona tu archivo .ipynb
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) /tmp/ipython-input-623164153.py in <cell line: 0>() 7 # Paso 1: Cargar tu archivo .ipynb 8 print("Por favor, selecciona tu archivo .ipynb") ----> 9 uploaded = files.upload() 10 11 # Paso 2: Obtener el nombre del archivo cargado /usr/local/lib/python3.12/dist-packages/google/colab/files.py in upload(target_dir) 70 """ 71 ---> 72 uploaded_files = _upload_files(multiple=True) 73 # Mapping from original filename to filename as saved locally. 74 local_filenames = dict() /usr/local/lib/python3.12/dist-packages/google/colab/files.py in _upload_files(multiple) 162 163 # First result is always an indication that the file picker has completed. --> 164 result = _output.eval_js( 165 'google.colab._files._uploadFiles("{input_id}", "{output_id}")'.format( 166 input_id=input_id, output_id=output_id /usr/local/lib/python3.12/dist-packages/google/colab/output/_js.py in eval_js(script, ignore_result, timeout_sec) 38 if ignore_result: 39 return ---> 40 return _message.read_reply_from_input(request_id, timeout_sec) 41 42 /usr/local/lib/python3.12/dist-packages/google/colab/_message.py in read_reply_from_input(message_id, timeout_sec) 94 reply = _read_next_input_message() 95 if reply == _NOT_READY or not isinstance(reply, dict): ---> 96 time.sleep(0.025) 97 continue 98 if ( KeyboardInterrupt: